import os
import sys
sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '../..'))

import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

from python.tools import (
    clean_folder
)

def get_autocorr(df, var_name, fe_var = None):
    """Wrapper for calculating first-order
    autocorrelation coefficient
    
    Args:
        df (df): Pandas dataframe
        var_name (str): Variable name
        fe_var (str, optional): Name of fixed-effects variable
    
    Returns:
        float: Autocorrelation coefficient
    """
    if fe_var == None:
        mod = smf.ols('{var_name} ~ {var_name}_LAG1'.format(var_name = var_name), data = df).fit()
    else:
        mod = smf.ols('{var_name} ~ C({fe_var}) + {var_name}_LAG1'.format(var_name = var_name,
                                                                       fe_var = fe_var), data = df).fit()
    return mod.params['{}_LAG1'.format(var_name)]

################
## Parameters ##
################

input_folder = './get_graphs/input'
output_folder = './get_graphs/output/summary_stats'
clean_folder(output_folder)

########################
## Consensus forecast ##
########################

df = pd.read_csv('{}/consensus_dataset.csv'.format(input_folder))

# Get mask for final sample
mask = ~pd.isna(df['SPFfor_Step2']) & ~pd.isna(df['Realiz1'])
df = df.loc[mask, ]

# Create variables
df['Efor_Step2'] = df['Realiz1'] - df['SPFfor_Step2']
for var_name in ['Realiz1', 'SPFfor_Step2', 'Efor_Step2']:
    df['{}_LAG1'.format(var_name)] = df[var_name].shift(1)
    
# Calculate summary statistics
N_actuals = df['Realiz1'].count()
N_cons = df['SPFfor_Step2'].count()
N_fe_cons = df['Efor_Step2'].count()

mean_actuals = df['Realiz1'].mean()
mean_cons = df['SPFfor_Step2'].mean()
mean_fe_cons = df['Efor_Step2'].mean()

SD_actuals = df['Realiz1'].std()
SD_cons = df['SPFfor_Step2'].std()
SD_fe_cons = df['Efor_Step2'].std()

rho_actuals = get_autocorr(df, 'Realiz1')
rho_cons = get_autocorr(df, 'SPFfor_Step2')
rho_fe_cons = get_autocorr(df, 'Efor_Step2')

##########################
## Individual forecasts ##
##########################

df = pd.read_csv('{}/individual_dataset.csv'.format(input_folder))

# Get mask for final sample
mask = ~pd.isna(df['SPFfor_Step2']) & ~pd.isna(df['Realiz1'])
df = df.loc[mask, ]

# Create variables
df['Efor_Step2'] = df['Realiz1'] - df['SPFfor_Step2']
for var_name in ['Realiz1', 'SPFfor_Step2', 'Efor_Step2']:
    df['{}_LAG1'.format(var_name)] = df[var_name].shift(1)

# Calculate summary statistics
N_ind = df['SPFfor_Step2'].count()
N_fe_ind = df['Efor_Step2'].count()

mean_ind = df['SPFfor_Step2'].mean()
mean_fe_ind = df['Efor_Step2'].mean()

SD_ind = df['SPFfor_Step2'].std()
SD_fe_ind = df['Efor_Step2'].std()

rho_ind = get_autocorr(df, var_name = 'SPFfor_Step2', fe_var = 'ID')
rho_fe_ind = get_autocorr(df, var_name = 'Efor_Step2', fe_var = 'ID')

######################
## Get LaTeX output ##
######################

latex_template = """
\\begin{{tabular}}{{llllll}}
\\toprule
    &  & \\multicolumn{{2}}{{c}}{{Consensus}} & \\multicolumn{{2}}{{c}}{{Individual}} \\\\
\\cmidrule(lr{{0.5em}}){{3-4}} \\cmidrule(lr{{0.5em}}){{5-6}}
    &  Actuals     & Forecasts & Errors            & Forecasts & Errors \\\\
\\midrule
Sample size &  {N_actuals} & {N_cons} & {N_fe_cons} & {N_ind} & {N_fe_ind}  \\\\
Mean & {mean_actuals} & {mean_cons} & {mean_fe_cons} & {mean_ind} & {mean_fe_ind} \\\\
Standard deviation & {SD_actuals} & {SD_cons} & {SD_fe_cons} & {SD_ind} & {SD_fe_ind} \\\\
Autocorrelation & {rho_actuals} & {rho_cons} & {rho_fe_cons} & {rho_ind} & {rho_fe_ind} \\\\
\\bottomrule
\\end{{tabular}}
"""
latex_template = latex_template.format(N_actuals = N_actuals,
                                       N_cons = N_cons,
                                       N_fe_cons = N_fe_cons,
                                       N_ind = N_ind,
                                       N_fe_ind = N_fe_ind,
                                       mean_actuals = round(mean_actuals, 2),
                                       mean_cons = round(mean_cons, 2),
                                       mean_fe_cons = round(mean_fe_cons, 2),
                                       mean_ind = round(mean_ind, 2),
                                       mean_fe_ind = round(mean_fe_ind, 2),
                                       SD_actuals = round(SD_actuals, 2),
                                       SD_cons = round(SD_cons, 2),
                                       SD_fe_cons = round(SD_fe_cons, 2),
                                       SD_ind = round(SD_ind, 2),
                                       SD_fe_ind = round(SD_fe_ind, 2),
                                       rho_actuals = round(rho_actuals, 2),
                                       rho_cons = round(rho_cons, 2),
                                       rho_fe_cons = round(rho_fe_cons, 2),
                                       rho_ind = round(rho_ind, 2),
                                       rho_fe_ind = round(rho_fe_ind, 2))

with open('{}/summary_stats.tex'.format(output_folder), "w") as text_file:
    text_file.write(latex_template)
